(*
  Copyright (c) 2009 Barry Schwartz

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation
  files (the "Software"), to deal in the Software without
  restriction, including without limitation the rights to use,
  copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the
  Software is furnished to do so, subject to the following
  conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  OTHER DEALINGS IN THE SOFTWARE.
*)

open UTypes
open Box

(* catcodes and math-codes *)
type cat_code =
  | Letter
  | Newline
  | Space
  | Escape
  | BeginGroup
  | EndGroup
  | BeginOptional
  | EndOptional
  | Macro
  | Comment
  | Other
  | EOF

let macro_char = 35                     (* # *)
let comment_char = 37                   (* % *)
let begin_optional_char = 91            (* left bracket *)
let escape_char = 92                    (* backslash *)
let end_optional_char = 93              (* right bracket *)
let begin_group_char = 123              (* left brace *)
let end_group_char = 125                (* right brace *)
let default_cat_codes_xx =
  [| Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other
  |]
  
let default_cat_codes_00 =
  [| (* newline *) (* space *) (* exclamation mark *) (* quotation mark *)
    (* number sign *) (* dollar sign *) (* percent sign *) (* ampersand *)
    (* apostrophe *) (* left parenthesis *) (* right parenthesis *)
    (* asterisk *) (* plus sign *) (* comma *) (* hyphen-minus *)
    (* period *) (* slash *) (* digit zero *) (* digit one *) (* digit two *)
    (* digit three *) (* digit four *) (* digit five *) (* digit six *)
    (* digit seven *) (* digit eight *) (* digit nine *) (* colon *)
    (* semicolon *) (* less-than sign *) (* equals sign *)
    (* greater-than sign *) (* question mark *) (* commercial at *)
    (* latin capital letter a *) (* latin capital letter b *)
    (* latin capital letter c *) (* latin capital letter d *)
    (* latin capital letter e *) (* latin capital letter f *)
    (* latin capital letter g *) (* latin capital letter h *)
    (* latin capital letter i *) (* latin capital letter j *)
    (* latin capital letter k *) (* latin capital letter l *)
    (* latin capital letter m *) (* latin capital letter n *)
    (* latin capital letter o *) (* latin capital letter p *)
    (* latin capital letter q *) (* latin capital letter r *)
    (* latin capital letter s *) (* latin capital letter t *)
    (* latin capital letter u *) (* latin capital letter v *)
    (* latin capital letter w *) (* latin capital letter x *)
    (* latin capital letter y *) (* latin capital letter z *)
    (* left square bracket *) (* backslash *) (* right square bracket *)
    (* circumflex accent *) (* underline *) (* grave accent *)
    (* latin small letter a *) (* latin small letter b *)
    (* latin small letter c *) (* latin small letter d *)
    (* latin small letter e *) (* latin small letter f *)
    (* latin small letter g *) (* latin small letter h *)
    (* latin small letter i *) (* latin small letter j *)
    (* latin small letter k *) (* latin small letter l *)
    (* latin small letter m *) (* latin small letter n *)
    (* latin small letter o *) (* latin small letter p *)
    (* latin small letter q *) (* latin small letter r *)
    (* latin small letter s *) (* latin small letter t *)
    (* latin small letter u *) (* latin small letter v *)
    (* latin small letter w *) (* latin small letter x *)
    (* latin small letter y *) (* latin small letter z *)
    (* left curly bracket *) (* vertical line *) (* right curly bracket *)
    (* tilde *) (* no-break space *) (* inverted exclamation mark *)
    (* cent sign *) (* pound sign *) (* currency sign *) (* yen sign *)
    (* broken bar *) (* section sign *) (* diaeresis *) (* copyright sign *)
    (* feminine ordinal indicator *) (* left guillemet *) (* not sign *)
    (* soft hyphen *) (* registered trade mark sign *) (* macron, overline *)
    (* degree sign *) (* plus-minus sign *) (* superscript two *)
    (* superscript three *) (* acute accent *) (* micro sign *)
    (* paragraph sign *) (* middle dot, kana conjoctive *) (* cedilla *)
    (* superscript one *) (* masculine ordinal indicator *)
    (* right guillemet *) (* vulgar fraction one quarter *)
    (* vulgar fraction one half *) (* vulgar fraction three quarters *)
    (* inverted question mark *)
    (* latin capital letter a with grave accent *)
    (* latin capital letter a with acute accent *)
    (* latin capital letter a with circumflex accent *)
    (* latin capital letter a with tilde *)
    (* latin capital letter a with diaeresis *)
    (* latin capital letter a with ring above *)
    (* latin capital letter a with e *)
    (* latin capital letter c with cedilla *)
    (* latin capital letter e with grave accent *)
    (* latin capital letter e with acute accent *)
    (* latin capital letter e with circumflex accent *)
    (* latin capital letter e with diaeresis *)
    (* latin capital letter i with grave accent *)
    (* latin capital letter i with acute accent *)
    (* latin capital letter i with circumflex accent *)
    (* latin capital letter i with diaeresis *)
    (* latin capital letter eth *) (* latin capital letter n with tilde *)
    (* latin capital letter o with grave accent *)
    (* latin capital letter o with acute accent *)
    (* latin capital letter o with circumflex accent *)
    (* latin capital letter o with tilde *)
    (* latin capital letter o with diaeresis *) (* multiplication sign *)
    (* latin capital letter o with oblique stroke *)
    (* latin capital letter u with grave accent *)
    (* latin capital letter u with acute accent *)
    (* latin capital letter u with circumflex accent *)
    (* latin capital letter u with diaeresis *)
    (* latin capital letter y with acute accent *)
    (* latin capital letter thorn *) (* latin small letter sharp s *)
    (* latin small letter a with grave accent *)
    (* latin small letter a with acute accent *)
    (* latin small letter a with circumflex accent *)
    (* latin small letter a with tilde *)
    (* latin small letter a with diaeresis *)
    (* latin small letter a with ring above *)
    (* latin small letter a with e *) (* latin small letter c with cedilla *)
    (* latin small letter e with grave accent *)
    (* latin small letter e with acute accent *)
    (* latin small letter e with circumflex accent *)
    (* latin small letter e with diaeresis *)
    (* latin small letter i with grave accent *)
    (* latin small letter i with acute accent *)
    (* latin small letter i with circumflex accent *)
    (* latin small letter i with diaeresis *) (* latin small letter eth *)
    (* latin small letter n with tilde *)
    (* latin small letter o with grave accent *)
    (* latin small letter o with acute accent *)
    (* latin small letter o with circumflex accent *)
    (* latin small letter o with tilde *)
    (* latin small letter o with diaeresis *) (* division sign *)
    (* latin small letter o with oblique stroke *)
    (* latin small letter u with grave accent *)
    (* latin small letter u with acute accent *)
    (* latin small letter u with circumflex accent *)
    (* latin small letter u with diaeresis *)
    (* latin small letter y with acute accent *) Space; Space; Space; Space;
    Space; Space; Space; Space; Space; Space; Newline; Space; Space; Space;
    Space; Space; Space; Space; Space; Space; Space; Space; Space; Space;
    Space; Space; Space; Space; Space; Space; Space; Space; Space; Other;
    Other; Macro; Other; Comment; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    BeginOptional; Escape; EndOptional; Other; Other; Other; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; BeginGroup; Other;
    EndGroup; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Other; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Other; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter;
    (* latin small letter thorn *) Letter
  |]
  
(* latin small letter y with diaeresis *)
let default_cat_codes_01 =
  [| (* latin capital letter a with macron *)
    (* latin small letter a with macron *)
    (* latin capital letter a with breve *)
    (* latin small letter a with breve *)
    (* latin capital letter a with ogonek *)
    (* latin small letter a with ogonek *)
    (* latin capital letter c with acute accent *)
    (* latin small letter c with acute accent *)
    (* latin capital letter c with circumflex *)
    (* latin small letter c with circumflex *)
    (* latin capital letter c with dot above *)
    (* latin small letter c with dot above *)
    (* latin capital letter c with caron *)
    (* latin small letter c with caron *)
    (* latin capital letter d with hacek *)
    (* latin small letter d with hacek *)
    (* latin capital letter d with stroke *)
    (* latin small letter d with stroke *)
    (* latin capital letter e with macron *)
    (* latin small letter e with macron *)
    (* latin capital letter e with breve *)
    (* latin small letter e with breve *)
    (* latin capital letter e with dot above *)
    (* latin small letter e with dot above *)
    (* latin capital letter e with ogonek *)
    (* latin small letter e with ogonek *)
    (* latin capital letter e with hacek *)
    (* latin small letter e with hacek *)
    (* latin capital letter g with circumflex *)
    (* latin small letter g with circumflex *)
    (* latin capital letter g with breve *)
    (* latin small letter g with breve *)
    (* latin capital letter g with dot above *)
    (* latin small letter g with dot above *)
    (* latin capital letter g with cedilla *)
    (* latin small letter g with cedilla *)
    (* latin capital letter h with circumflex *)
    (* latin small letter h with circumflex *)
    (* latin capital letter h with stroke *)
    (* latin small letter h with stroke *)
    (* latin capital letter i with tilde *)
    (* latin small letter i with tilde *)
    (* latin capital letter i with macron *)
    (* latin small letter i with macron *)
    (* latin capital letter i with breve *)
    (* latin small letter i with breve *)
    (* latin capital letter i with ogonek *)
    (* latin small letter i with ogonek *)
    (* latin capital letter i with dot above *)
    (* latin small letter i without dot above *)
    (* latin capital ligature ij *) (* latin small ligature ij *)
    (* latin capital letter j with circumflex *)
    (* latin small letter j with circumflex *)
    (* latin capital letter k with cedilla *)
    (* latin small letter k with cedilla *) (* latin small letter kra *)
    (* latin capital letter l with acute accent *)
    (* latin small letter l with acute accent *)
    (* latin capital letter l with cedilla *)
    (* latin small letter l with cedilla *)
    (* latin capital letter l with hacek *)
    (* latin small letter l with hacek *)
    (* latin capital letter l with middle dot *)
    (* latin small letter l with middle dot *)
    (* latin capital letter l with stroke *)
    (* latin small letter l with stroke *)
    (* latin capital letter n with acute accent *)
    (* latin small letter n with acute accent *)
    (* latin capital letter n with cedilla *)
    (* latin small letter n with cedilla *)
    (* latin capital letter n with hacek *)
    (* latin small letter n with hacek *)
    (* latin small letter n preceded by apostrophe *)
    (* latin capital letter eng *) (* latin small letter eng *)
    (* latin capital letter o with macron *)
    (* latin small letter o with macron *)
    (* latin capital letter o with breve *)
    (* latin small letter o with breve *)
    (* latin capital letter o with double acute accent *)
    (* latin small letter o with double acute accent *)
    (* latin capital ligature o with e *) (* latin small ligature o with e *)
    (* latin capital letter r with acute accent *)
    (* latin small letter r with acute accent *)
    (* latin capital letter r with cedilla *)
    (* latin small letter r with cedilla *)
    (* latin capital letter r with hacek *)
    (* latin small letter r with hacek *)
    (* latin capital letter s with acute accent *)
    (* latin small letter s with acute accent *)
    (* latin capital letter s with circumflex *)
    (* latin small letter s with circumflex *)
    (* latin capital letter s with cedilla *)
    (* latin small letter s with cedilla *)
    (* latin capital letter s with hacek *)
    (* latin small letter s with hacek *)
    (* latin capital letter t with cedilla *)
    (* latin small letter t with cedilla *)
    (* latin capital letter t with hacek *)
    (* latin small letter t with hacek *)
    (* latin capital letter t with stroke *)
    (* latin small letter t with stroke *)
    (* latin capital letter u with tilde *)
    (* latin small letter u with tilde *)
    (* latin capital letter u with macron *)
    (* latin small letter u with macron *)
    (* latin capital letter u with breve *)
    (* latin small letter u with breve *)
    (* latin capital letter u with ring above *)
    (* latin small letter u with ring above *)
    (* latin capital letter u with double acute accent *)
    (* latin small letter u with double acute accent *)
    (* latin capital letter u with ogonek *)
    (* latin small letter u with ogonek *)
    (* latin capital letter w with circumflex *)
    (* latin cmall letter w with circumflex *)
    (* latin capital letter y with circumflex *)
    (* latin small letter y with circumflex *)
    (* latin capital letter y with diaeresis *)
    (* latin capital letter z with acute accent *)
    (* latin small letter z with acute accent *)
    (* latin capital letter z with dot above *)
    (* latin small letter z with dot above *)
    (* latin capital letter z with hacek *)
    (* latin small letter z with hacek *) (* latin small letter long s *)
    (* latin small letter script f, florin sign *)
    (* latin capital letter a with ring above and acute *)
    (* latin small letter a with ring above and acute *)
    (* latin capital ligature ae with acute *)
    (* latin small ligature ae with acute *) Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Letter; Letter; Letter; Letter; Letter;
    (* latin capital letter o with stroke and acute *) Letter
  |]
  
(* latin small letter o with stroke and acute *)
let default_cat_codes_03 =
  [| (* greek tonos *) (* greek dialytika tonos *)
    (* greek capital letter alpha with tonos *) (* greek ano teleia *)
    (* greek capital letter epsilon with tonos *)
    (* greek capital letter eta with tonos *)
    (* greek capital letter iota with tonos *)
    (* greek capital letter omicron with tonos *)
    (* greek capital letter upsilon with tonos *)
    (* greek capital letter omega with tonos *)
    (* greek small letter iota with dialytika and tonos *)
    (* greek capital letter alpha *) (* greek capital letter beta *)
    (* greek capital letter gamma *) (* greek capital letter delta *)
    (* greek capital letter epsilon *) (* greek capital letter zeta *)
    (* greek capital letter eta *) (* greek capital letter theta *)
    (* greek capital letter iota *) (* greek capital letter kappa *)
    (* greek capital letter lamda *) (* greek capital letter mu *)
    (* greek capital letter nu *) (* greek capital letter xi *)
    (* greek capital letter omicron *) (* greek capital letter pi *)
    (* greek capital letter rho *) (* greek capital letter sigma *)
    (* greek capital letter tau *) (* greek capital letter upsilon *)
    (* greek capital letter phi *) (* greek capital letter chi *)
    (* greek capital letter psi *) (* greek capital letter omega *)
    (* greek capital letter iota with dialytika *)
    (* greek capital letter upsilon with dialytika *)
    (* greek small letter alpha with tonos *)
    (* greek small letter epsilon with tonos *)
    (* greek small letter eta with tonos *)
    (* greek small letter iota with tonos *)
    (* greek small letter upsilon with dialytika and tonos *)
    (* greek small letter alpha *) (* greek small letter beta *)
    (* greek small letter gamma *) (* greek small letter delta *)
    (* greek small letter epsilon *) (* greek small letter zeta *)
    (* greek small letter eta *) (* greek small letter theta *)
    (* greek small letter iota *) (* greek small letter kappa *)
    (* greek small letter lamda *) (* greek small letter mu *)
    (* greek small letter nu *) (* greek small letter xi *)
    (* greek small letter omicron *) (* greek small letter pi *)
    (* greek small letter rho *) (* greek small letter final sigma *)
    (* greek small letter sigma *) (* greek small letter tau *)
    (* greek small letter upsilon *) (* greek small letter phi *)
    (* greek small letter chi *) (* greek small letter psi *)
    (* greek small letter omega *)
    (* greek small letter iota with dialytika *)
    (* greek small letter upsilon with dialytika *)
    (* greek small letter omicron with tonos *)
    (* greek small letter upsilon with tonos *)
    (* greek small letter omega with tonos *) Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Letter; Other; Letter; Letter; Letter; Other; Letter; Other; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Other; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter; Letter;
    Letter; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other
  |]
  
let default_cat_codes_1e =
  [| (* latin capital letter w with grave *)
    (* latin small letter w with grave *)
    (* latin capital letter w with acute *)
    (* latin small letter w with acute *)
    (* latin capital letter w with diaeresis *)
    (* latin small letter w with diaeresis *)
    (* latin capital letter y with grave *)
    (* latin small letter y with grave *) Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Letter; Letter; Letter; Letter; Letter; Letter;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Letter; Letter;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other
  |]
  
let default_cat_codes_fb =
  [| (* latin small ligature ff *) (* latin small ligature fi *)
    (* latin small ligature fl *) (* latin small ligature ffi *)
    (* latin small ligature ffl *) (* latin small ligature long s t *)
    (* latin small ligature st *) Letter; Letter; Letter; Letter; Letter;
    Letter; Letter; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other; Other; Other; Other; Other; Other; Other; Other; Other; Other;
    Other
  |]
  
let cat_code_table =
  CharMap.build
    [| default_cat_codes_00; default_cat_codes_01; default_cat_codes_xx;
      default_cat_codes_03; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_1e; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_fb;
      default_cat_codes_xx; default_cat_codes_xx; default_cat_codes_xx;
      default_cat_codes_xx
    |]
  
let cat_code char =
  if char < 0 then EOF else CharMap.lookup cat_code_table char
  

